package com.kdtech.analyse.news;
import com.kdtech.analyse.AnalyseNews;
import com.kdtech.utils.HtmlCleaner;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.kdtech.analyse.JSoupUtils;
import com.kdtech.crawler.CrawlHTML;
import com.kdtech.entity.crawler.UrlMeta;
import com.kdtech.entity.data.NewsMeta;
import com.kdtech.utils.DateUtils;
import com.kdtech.utils.StringUtils;
import com.kdtech.utils.HtmlCleaner;

/**
 *  ZjolNewsAnalyse  浙江在线新闻解析类
 *  xiaonie 2012-11-21
 */

public class ZjolNewsAnalyse  implements AnalyseNews{
	private final String[] regexs={

			"http://.*.zjol.com.cn/.*system/[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]+.shtml.*",
			};

	
	public boolean isDetailPage(String url) {
		for (String regex : regexs) {
			if (url.matches(regex)) {
				return true;
			}
		}
		return false;
	}

	
	public NewsMeta parserHtml(UrlMeta urlMeta) {
		NewsMeta news=new NewsMeta();
		if (urlMeta.getHtml() == null) {
		}
		String htmltxt=urlMeta.getHtml();
		String url=urlMeta.getUrl();
		if (!isDetailPage(url)) {
		}
		Document doc=Jsoup.parse(htmltxt);
		String title=null;
		String content=null;
		String author=null;
		Long date=null;
		String dateStr=null;
		doc.select(".rightsider").remove();
		doc.select("#list,div#notice").remove();
		if (htmltxt != null && htmltxt != "") {
			news.setUrl(url);
//			<meta http-equiv="refresh" content="0;URL=http://zjnews.zjol.com.cn/system/2014/04/02/019945076.shtml">
			Elements select=doc.select("meta[http-equiv=refresh");
			if(select!=null){
				String attr=select.attr("content");
				if(attr!=null && attr.indexOf("URL=")!=-1){
					attr=StringUtils.substringAfter(attr, "URL=");
					UrlMeta responseToURL=CrawlHTML.responseToURL(attr);
					if (responseToURL!=null && responseToURL.getHtml()!=null){
						doc=Jsoup.parse(responseToURL.getHtml());
					}else{
					}
				}
			}
			if(url.startsWith("http://gotrip.zjol.com.cn") || url.startsWith("http://edu.zjol.com.cn")){
				try {
					title=doc.select("td.tit").text();
					content=HtmlCleaner.getContentHtml(url,doc.select("div.detail"));
					if(content == null || content.trim().length() == 0){
						content=HtmlCleaner.getContentHtml(url,doc.select("div.detail > table > tr:eq(4)"));
					}
					dateStr=doc.select("td.date").text();
				} catch (Exception e) {
					if(url.startsWith("http://edu.zjol.com.cn")){
						try {
							title=doc.select("div.px24").text();
							content=HtmlCleaner.getContentHtml(url,doc.select("div.edu14"));
							dateStr=doc.select("div[align=center]:eq(2)").text();
						} catch (Exception e1) {
							e1.printStackTrace();
						}
					}else{
						e.printStackTrace();
					}
				}
			}else if(url.startsWith("http://ks.zjol.com.cn")){
				title=doc.select("div.main_title").text();
				content=HtmlCleaner.getContentHtml(url,doc.select("div.main_content"));
				dateStr=doc.select("div.date").text();
			}else if(url.startsWith("http://health.zjol.com.cn")){
				title=doc.select("div.big_tit").text();
				content=HtmlCleaner.getContentHtml(url,doc.select("div.main_con"));
				dateStr=doc.select("div.source").text();
			}else if(url.startsWith("http://ent.zjol.com.cn")){
				title=doc.select("h3").text();
				content=HtmlCleaner.getContentHtml(url,doc.select("div#ArticleCnt"));
				dateStr=doc.select("p[align=center]").text();
			}else if(url.startsWith("http://315.zjol.com.cn")){
				title=doc.select("h2").text();
				content=HtmlCleaner.getContentHtml(url,doc.select("div.gs_t_c_m_p"));
				dateStr=doc.select("div.gs_t_info_s.fl").text();
			}else if(url.startsWith("http://sznews.zjol.com.cn/")){
				title=doc.select("span.sz3").text();
				if(StringUtils.isBlank(title)){
					title=doc.select("title").text();
					title=StringUtils.substringBefore(title, "--");
				}
				content=HtmlCleaner.getContentHtml(url,doc.select("span.sz23"));
				if(StringUtils.isBlank(content)){
					content=HtmlCleaner.getContentHtml(url,doc.select("span.STYLE20"));
				}
				date=DateUtils.matchDate(doc.select("span.sz1").text());
				if(StringUtils.isBlank(dateStr)){
					date=DateUtils.matchDate(doc.select("div[align=center]").text());
				}
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}else if(url.startsWith("http://syxww.zjol.com.cn")){
				title=doc.select("p.STYLE1").text();
				content=HtmlCleaner.getContentHtml(url,doc.select("span.p_1"));
				date=DateUtils.matchDate(doc.select("p.p2").text());
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}else if(url.startsWith("http://zjnews.zjol.com.cn")){
				title=doc.select("div.contTit").text();
				content=HtmlCleaner.getContentHtml(url,doc.select("span.p_1"));
				date=DateUtils.matchDate(doc.select("div.time").text());
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}else if(url.startsWith("http://scnews.zjol.com.cn")){
				title=doc.select(".content h2").text();
				content=HtmlCleaner.getContentHtml(url,doc.select(".content .p_1 p"));
				date=JSoupUtils.matchDate(doc, "来源：");
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}else if(url.startsWith("http://iptvlm.zjol.com.cn")){
				title=doc.select("h1").text();
				content=HtmlCleaner.getContentHtml(url,doc.select("#textbox p"));
				date=JSoupUtils.matchDate(doc, "时间：");
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}else if(url.startsWith("http://jnnews.zjol.com.cn")){
				title=doc.select("title").text();
				title=StringUtils.substringBefore(title, "--");
				content=HtmlCleaner.getContentHtml(url,doc.select(".content p"));
				date=JSoupUtils.matchDate(doc, "来源：");
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}else if(url.startsWith("http://china.zjol.com.cn")){
				title=doc.select("div.contTit").text();
				content=HtmlCleaner.getContentHtml(url,doc.select(".contTxt"));
				date=JSoupUtils.matchDate(doc, "来源：");
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}else if(url.startsWith("http://photo.zjol.com.cn/")){
				Elements select2=doc.select("h1");
				if(select2!=null){
					select2.select("span").remove();
					title=select2.text();
				}else{
					title=select2.text();
				}
				content=HtmlCleaner.getContentHtml(url,doc.select("div.wrap_text"));
				date=DateUtils.matchDate(doc.select(".time").text());
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}else{
				title=doc.select("div.contentTitle").text();
				if(StringUtils.isBlank(title)){
					title=doc.select("#content .news_detail div.title").text();
					if(StringUtils.isBlank(title)){
						title=doc.select("td.tit").text();
						if(StringUtils.isBlank(title)){
							title=doc.select("h1").text();
							if(StringUtils.isBlank(title)){
								title=doc.select("td.title_gray24").text();
								if(StringUtils.isBlank(title)){
									title=doc.select(".contTit").text();
									if(StringUtils.isBlank(title)){
										title=doc.select("div.main_template_middle_title").text();
										if(StringUtils.isBlank(title)){
											title=doc.select("div#thistitle").text();
										}
									}
								}
							}
						}
					}
				}
				content=HtmlCleaner.getContentHtml(url,doc.select("#content .news_detail div.detail"));
				if(StringUtils.isBlank(content)){
					content=HtmlCleaner.getContentHtml(url,doc.select("div#ArticleCnt"));
				}

				if(StringUtils.isBlank(content)){
					content=HtmlCleaner.getContentHtml(url,doc.select("td.detail"));
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("#oZoom"));
						if(StringUtils.isBlank(content)){
							content=HtmlCleaner.getContentHtml(url,doc.select("#contents"));
						}
					}
				}
				if(date==null)date=DateUtils.matchDate(doc.select("div.contentDateCopyright,td.zuozhe13,.date").text());
			}
			if(StringUtils.isBlank(title)){
				title=doc.select("title").text();
				title=StringUtils.substringBefore(title, "-");
			}
			if(date==null){
				date=DateUtils.matchDate(dateStr);
			}
			if(date==null){
				date=JSoupUtils.matchDate(doc, "来源","时间：");
			}
			if(date==null){
				date=DateUtils.matchDate(doc.select("div.main_template_middle_source,td.title3,h3").text());
				if(date==null){
					date=DateUtils.matchDate(doc.select("div.time").text());
					if(date==null){
						date=DateUtils.matchDate(doc.select("div#info").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("div.info").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("div.infor").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("div.riqi").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("td.gongneng01").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("div.artile_info").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("div.huise").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("#pub_date").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("span.a_time").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("div.tit2").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("div.art-time").text());
					}
					if(date==null){
						date=DateUtils.matchDate(doc.select("td.ti").text());
					}
					if(date==null){
						date=DateUtils.matchDate(url);
					}
				}
			}
			if(StringUtils.isBlank(content)){
				content=HtmlCleaner.getContentHtml(url,doc.select("div.main_template_middle_content"));
				if(StringUtils.isBlank(content)){
					content=HtmlCleaner.getContentHtml(url,doc.select("div.contTxt"));
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("div#content"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("span#article"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("span.intro"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("div.content"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("td.bte"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("#fontzoom"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("div.artical_real"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("#attile"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("td.word"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("#artibody"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("td.style_p2"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("div.a_body"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("td.xz2"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("div.art-con"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("div#zoom"));
					}
					if(StringUtils.isBlank(content)){
						content=HtmlCleaner.getContentHtml(url,doc.select("div.detail table p"));
					}
				}
			}
			if(StringUtils.isBlank(author)){
				author=JSoupUtils.matchAuthor(doc, "来源：");
			}
			if(StringUtils.isBlank(author)){
				author=doc.select("td.time").text();
				author=StringUtils.substringBetween(author, "com.cn","来源：");
			}
			if(StringUtils.isBlank(author)){
				author=doc.select("div.art-time").text();
				author=StringUtils.substringAfter(author, "  ");
			}
			if(StringUtils.isBlank(author)){
				author=doc.select("font.dq").text();
			}

			news.setContent(content);
			news.setTitle(title);
			news.setAuthor(author);
			news.setClickNum(null);
			news.setCommentNum(null);

			news.setDate(date);
			news.setType(1);
		}
		return news;
	}

	public static void main(String[] args) {
		ZjolNewsAnalyse a=new ZjolNewsAnalyse();
		String url="http://zjnews.zjol.com.cn/system/2014/08/14/020197384.shtml";
		UrlMeta meta=CrawlHTML.responseToURL(url);
		System.out.println(a.isDetailPage(url));
		NewsMeta parserHtml=a.parserHtml(meta);
		System.out.println(parserHtml);
	}

	
	public NewsMeta Update(NewsMeta meta) {
		return null;
	}
	
	public boolean isNeedUpdate(){
		return false;
	}
}
